# encoding: utf-8

import DataExtract.UnstructuredData as TextProcess
import DataExtract.structuredData as tableProcess
import Util.file as fileOperator

if __name__ == '__main__':
	inputfile = 'D:\项目\荣大语义分析\数据及描述-草稿\测试数据\标注\\0in.htm'
	outputfile = inputfile.split(".")[0] + "_out\\"
	fileOperator.mkdir(outputfile)
	# 1.Text Process
	# TextProcess.read_html(inputfile,outputfile+"content.txt")
	# TextProcess.entityIdentify(outputfile+"content.txt",outputfile+"entityIdentify.txt")
	# 2.Table Process
	tableProcess.extractTable(inputfile,outputfile)